Evaluating all models on dataset: Gorilla Test
questions = read.csv("data/gorilla_test_continuous.csv", sep=" ")
Evaluate 00_bag_of_words on Gorilla Test
results = read.csv("output/00_bag_of_words_model_on_gorilla_test_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.05660 -0.03318 -0.02829 0.02295 0.88373
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.028293 0.003242 8.727 < 2e-16 ***
## questions$y 0.028306 0.008437 3.355 0.000845 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0574 on 586 degrees of freedom
## Multiple R-squared: 0.01885, Adjusted R-squared: 0.01717
## F-statistic: 11.26 on 1 and 586 DF, p-value: 0.0008445
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n00_bag_of_words\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 01_harmony_paraphrase-multilingual-MiniLM-L12-v2 on Gorilla
Test
results = read.csv("output/01_harmony_paraphrase-multilingual-MiniLM-L12-v2_model_on_gorilla_test_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.24806 -0.08598 -0.01575 0.08159 0.51494
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.290288 0.006659 43.592 < 2e-16 ***
## questions$y 0.104806 0.017330 6.048 2.62e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1179 on 586 degrees of freedom
## Multiple R-squared: 0.05875, Adjusted R-squared: 0.05714
## F-statistic: 36.58 on 1 and 586 DF, p-value: 2.618e-09
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n01_harmony_paraphrase-multilingual-MiniLM-L12-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 02_harmony_distiluse-base-multilingual-cased-v2 on Gorilla
Test
results = read.csv("output/02_harmony_distiluse-base-multilingual-cased-v2_model_on_gorilla_test_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.26998 -0.09691 -0.01865 0.08191 0.65351
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.17920 0.00741 24.182 < 2e-16 ***
## questions$y 0.11730 0.01929 6.082 2.14e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1312 on 586 degrees of freedom
## Multiple R-squared: 0.05938, Adjusted R-squared: 0.05778
## F-statistic: 37 on 1 and 586 DF, p-value: 2.137e-09
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n02_harmony_distiluse-base-multilingual-cased-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 03_harmony_stsb-xlm-r-multilingual on Gorilla Test
results = read.csv("output/03_harmony_stsb-xlm-r-multilingual_model_on_gorilla_test_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.34730 -0.08803 -0.00613 0.09131 0.48369
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.323896 0.007581 42.722 < 2e-16 ***
## questions$y 0.119809 0.019730 6.072 2.27e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1342 on 586 degrees of freedom
## Multiple R-squared: 0.0592, Adjusted R-squared: 0.0576
## F-statistic: 36.87 on 1 and 586 DF, p-value: 2.265e-09
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n03_harmony_stsb-xlm-r-multilingual\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 04_harmony_paraphrase-multilingual-mpnet-base-v2 on Gorilla
Test
results = read.csv("output/04_harmony_paraphrase-multilingual-mpnet-base-v2_model_on_gorilla_test_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28904 -0.08231 -0.00774 0.07656 0.49546
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.304029 0.006401 47.495 < 2e-16 ***
## questions$y 0.133727 0.016658 8.028 5.46e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1133 on 586 degrees of freedom
## Multiple R-squared: 0.09907, Adjusted R-squared: 0.09754
## F-statistic: 64.44 on 1 and 586 DF, p-value: 5.459e-15
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n04_harmony_paraphrase-multilingual-mpnet-base-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 05_harmony_all-mpnet-base-v2 on Gorilla Test
results = read.csv("output/05_harmony_all-mpnet-base-v2_model_on_gorilla_test_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.27184 -0.07691 -0.00316 0.06680 0.46247
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.220283 0.006029 36.537 <2e-16 ***
## questions$y 0.134042 0.015690 8.543 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1068 on 586 degrees of freedom
## Multiple R-squared: 0.1108, Adjusted R-squared: 0.1092
## F-statistic: 72.99 on 1 and 586 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n05_harmony_all-mpnet-base-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 10_jose on Gorilla Test
results = read.csv("output/10_jose_model_on_gorilla_test_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.26459 -0.08976 -0.01490 0.08245 0.48942
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.161909 0.007257 22.31 <2e-16 ***
## questions$y 0.205255 0.018885 10.87 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1285 on 586 degrees of freedom
## Multiple R-squared: 0.1678, Adjusted R-squared: 0.1664
## F-statistic: 118.1 on 1 and 586 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n10_jose\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 11_raafi on Gorilla Test
results = read.csv("output/11_raafi_model_on_gorilla_test_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.31973 -0.10799 -0.01814 0.09352 0.50840
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.151555 0.007839 19.33 <2e-16 ***
## questions$y 0.227928 0.020401 11.17 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1388 on 586 degrees of freedom
## Multiple R-squared: 0.1756, Adjusted R-squared: 0.1742
## F-statistic: 124.8 on 1 and 586 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n11_raafi\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 20_openai_text-embedding-ada-002 on Gorilla Test
results = read.csv("output/20_openai_text-embedding-ada-002_model_on_gorilla_test_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.064331 -0.022306 -0.004568 0.018511 0.165382
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.778004 0.001788 435.046 < 2e-16 ***
## questions$y 0.038280 0.004654 8.225 1.26e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03167 on 586 degrees of freedom
## Multiple R-squared: 0.1035, Adjusted R-squared: 0.102
## F-statistic: 67.66 on 1 and 586 DF, p-value: 1.26e-15
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n20_openai_text-embedding-ada-002\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 21_openai_text-embedding-3-large on Gorilla Test
results = read.csv("output/21_openai_text-embedding-3-large_model_on_gorilla_test_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.22722 -0.06424 -0.00892 0.05395 0.60928
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.226149 0.005228 43.258 <2e-16 ***
## questions$y 0.130319 0.013605 9.579 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09257 on 586 degrees of freedom
## Multiple R-squared: 0.1354, Adjusted R-squared: 0.1339
## F-statistic: 91.76 on 1 and 586 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n21_openai_text-embedding-3-large\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 40_google_vertex_ai_gecko on Gorilla Test
results = read.csv("output/40_google_vertex_ai_gecko_model_on_gorilla_test_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.127154 -0.029805 -0.002913 0.030491 0.172600
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.755533 0.002473 305.48 < 2e-16 ***
## questions$y 0.035719 0.006436 5.55 4.34e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04379 on 586 degrees of freedom
## Multiple R-squared: 0.04993, Adjusted R-squared: 0.04831
## F-statistic: 30.8 on 1 and 586 DF, p-value: 4.344e-08
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n40_google_vertex_ai_gecko\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 41_google_vertex_ai_gecko_multilingual on Gorilla Test
results = read.csv("output/41_google_vertex_ai_gecko_multilingual_model_on_gorilla_test_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.071753 -0.016015 0.000942 0.017731 0.091082
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.853988 0.001439 593.317 < 2e-16 ***
## questions$y 0.026230 0.003746 7.003 6.91e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02549 on 586 degrees of freedom
## Multiple R-squared: 0.07722, Adjusted R-squared: 0.07564
## F-statistic: 49.04 on 1 and 586 DF, p-value: 6.91e-12
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n41_google_vertex_ai_gecko_multilingual\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluating all models on dataset: Gorilla Train
questions = read.csv("data/gorilla_train_continuous.csv", sep=" ")
Evaluate 00_bag_of_words on Gorilla Train
results = read.csv("output/00_bag_of_words_model_on_gorilla_train_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.04638 -0.03354 -0.03053 0.02411 0.38695
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.030531 0.001304 23.406 < 2e-16 ***
## questions$y 0.015849 0.003431 4.619 4.07e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0458 on 2349 degrees of freedom
## Multiple R-squared: 0.009, Adjusted R-squared: 0.008578
## F-statistic: 21.33 on 1 and 2349 DF, p-value: 4.068e-06
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n00_bag_of_words\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 01_harmony_paraphrase-multilingual-MiniLM-L12-v2 on Gorilla
Train
results = read.csv("output/01_harmony_paraphrase-multilingual-MiniLM-L12-v2_model_on_gorilla_train_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.29258 -0.08957 -0.01122 0.07723 0.55788
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.294845 0.003465 85.09 <2e-16 ***
## questions$y 0.117966 0.009115 12.94 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1217 on 2349 degrees of freedom
## Multiple R-squared: 0.06656, Adjusted R-squared: 0.06616
## F-statistic: 167.5 on 1 and 2349 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n01_harmony_paraphrase-multilingual-MiniLM-L12-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 02_harmony_distiluse-base-multilingual-cased-v2 on Gorilla
Train
results = read.csv("output/02_harmony_distiluse-base-multilingual-cased-v2_model_on_gorilla_train_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28036 -0.10809 -0.02037 0.08638 0.62556
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.18286 0.00392 46.65 <2e-16 ***
## questions$y 0.11835 0.01031 11.48 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1376 on 2349 degrees of freedom
## Multiple R-squared: 0.05311, Adjusted R-squared: 0.05271
## F-statistic: 131.8 on 1 and 2349 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n02_harmony_distiluse-base-multilingual-cased-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 03_harmony_stsb-xlm-r-multilingual on Gorilla Train
results = read.csv("output/03_harmony_stsb-xlm-r-multilingual_model_on_gorilla_train_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.38340 -0.09545 -0.00419 0.09316 0.54619
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.339511 0.003985 85.196 <2e-16 ***
## questions$y 0.092303 0.010483 8.805 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1399 on 2349 degrees of freedom
## Multiple R-squared: 0.03195, Adjusted R-squared: 0.03154
## F-statistic: 77.53 on 1 and 2349 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n03_harmony_stsb-xlm-r-multilingual\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 04_harmony_paraphrase-multilingual-mpnet-base-v2 on Gorilla
Train
results = read.csv("output/04_harmony_paraphrase-multilingual-mpnet-base-v2_model_on_gorilla_train_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.31752 -0.08669 -0.00906 0.07643 0.48324
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.311959 0.003406 91.59 <2e-16 ***
## questions$y 0.130843 0.008959 14.60 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1196 on 2349 degrees of freedom
## Multiple R-squared: 0.08324, Adjusted R-squared: 0.08285
## F-statistic: 213.3 on 1 and 2349 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n04_harmony_paraphrase-multilingual-mpnet-base-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 05_harmony_all-mpnet-base-v2 on Gorilla Train
results = read.csv("output/05_harmony_all-mpnet-base-v2_model_on_gorilla_train_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.30851 -0.08115 -0.01038 0.06955 0.48684
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.228753 0.003278 69.78 <2e-16 ***
## questions$y 0.133164 0.008624 15.44 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1151 on 2349 degrees of freedom
## Multiple R-squared: 0.09215, Adjusted R-squared: 0.09177
## F-statistic: 238.4 on 1 and 2349 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n05_harmony_all-mpnet-base-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 10_jose on Gorilla Train
results = read.csv("output/10_jose_model_on_gorilla_train_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.32708 -0.08633 -0.01909 0.07063 0.51640
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.137731 0.003534 38.98 <2e-16 ***
## questions$y 0.305037 0.009296 32.81 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1241 on 2349 degrees of freedom
## Multiple R-squared: 0.3143, Adjusted R-squared: 0.314
## F-statistic: 1077 on 1 and 2349 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n10_jose\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 11_raafi on Gorilla Train
results = read.csv("output/11_raafi_model_on_gorilla_train_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.31143 -0.07633 -0.01795 0.07008 0.54754
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.096383 0.003245 29.70 <2e-16 ***
## questions$y 0.427096 0.008537 50.03 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.114 on 2349 degrees of freedom
## Multiple R-squared: 0.5159, Adjusted R-squared: 0.5157
## F-statistic: 2503 on 1 and 2349 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n11_raafi\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 20_openai_text-embedding-ada-002 on Gorilla Train
results = read.csv("output/20_openai_text-embedding-ada-002_model_on_gorilla_train_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.084409 -0.024983 -0.004173 0.021639 0.140441
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.7787366 0.0009703 802.61 <2e-16 ***
## questions$y 0.0424163 0.0025523 16.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03407 on 2349 degrees of freedom
## Multiple R-squared: 0.1052, Adjusted R-squared: 0.1048
## F-statistic: 276.2 on 1 and 2349 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n20_openai_text-embedding-ada-002\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 21_openai_text-embedding-3-large on Gorilla Train
results = read.csv("output/21_openai_text-embedding-3-large_model_on_gorilla_train_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.24942 -0.07476 -0.00977 0.06011 0.55280
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.229794 0.002858 80.41 <2e-16 ***
## questions$y 0.140477 0.007517 18.69 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1003 on 2349 degrees of freedom
## Multiple R-squared: 0.1294, Adjusted R-squared: 0.1291
## F-statistic: 349.2 on 1 and 2349 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n21_openai_text-embedding-3-large\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 40_google_vertex_ai_gecko on Gorilla Train
results = read.csv("output/40_google_vertex_ai_gecko_model_on_gorilla_train_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.137242 -0.032080 -0.001186 0.032612 0.159676
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.755104 0.001295 582.96 <2e-16 ***
## questions$y 0.043516 0.003407 12.77 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04548 on 2349 degrees of freedom
## Multiple R-squared: 0.06493, Adjusted R-squared: 0.06453
## F-statistic: 163.1 on 1 and 2349 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n40_google_vertex_ai_gecko\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 41_google_vertex_ai_gecko_multilingual on Gorilla
Train
results = read.csv("output/41_google_vertex_ai_gecko_multilingual_model_on_gorilla_train_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.076036 -0.018178 -0.000117 0.018602 0.096765
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.855249 0.000750 1140.34 <2e-16 ***
## questions$y 0.025297 0.001973 12.82 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02634 on 2349 degrees of freedom
## Multiple R-squared: 0.06541, Adjusted R-squared: 0.06502
## F-statistic: 164.4 on 1 and 2349 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n41_google_vertex_ai_gecko_multilingual\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluating all models on dataset: McElroy 2024 Cosine
Correlation
questions = read.csv("data/mcelroy_2024_cosine_correlation_continuous.csv", sep=" ")
Evaluate 00_bag_of_words on McElroy 2024 Cosine Correlation
results = read.csv("output/00_bag_of_words_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.08143 -0.05149 -0.00142 0.03111 0.35602
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.03422 0.01947 -1.758 0.0792 .
## questions$y 0.13502 0.03001 4.498 7.95e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0614 on 739 degrees of freedom
## Multiple R-squared: 0.02665, Adjusted R-squared: 0.02534
## F-statistic: 20.24 on 1 and 739 DF, p-value: 7.949e-06
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n00_bag_of_words\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 01_harmony_paraphrase-multilingual-MiniLM-L12-v2 on McElroy
2024 Cosine Correlation
results = read.csv("output/01_harmony_paraphrase-multilingual-MiniLM-L12-v2_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.31863 -0.07984 0.00036 0.06857 0.44146
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.19429 0.03743 -5.191 2.7e-07 ***
## questions$y 0.90291 0.05770 15.647 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.118 on 739 degrees of freedom
## Multiple R-squared: 0.2489, Adjusted R-squared: 0.2478
## F-statistic: 244.8 on 1 and 739 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n01_harmony_paraphrase-multilingual-MiniLM-L12-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 02_harmony_distiluse-base-multilingual-cased-v2 on McElroy
2024 Cosine Correlation
results = read.csv("output/02_harmony_distiluse-base-multilingual-cased-v2_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.29003 -0.08933 -0.01011 0.07986 0.51659
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.03942 0.04051 -0.973 0.331
## questions$y 0.63494 0.06246 10.165 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1278 on 739 degrees of freedom
## Multiple R-squared: 0.1227, Adjusted R-squared: 0.1215
## F-statistic: 103.3 on 1 and 739 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n02_harmony_distiluse-base-multilingual-cased-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 03_harmony_stsb-xlm-r-multilingual on McElroy 2024 Cosine
Correlation
results = read.csv("output/03_harmony_stsb-xlm-r-multilingual_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.34905 -0.08641 -0.00505 0.08387 0.42588
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.12061 0.04084 -2.954 0.00324 **
## questions$y 0.82406 0.06296 13.089 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1288 on 739 degrees of freedom
## Multiple R-squared: 0.1882, Adjusted R-squared: 0.1871
## F-statistic: 171.3 on 1 and 739 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n03_harmony_stsb-xlm-r-multilingual\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 04_harmony_paraphrase-multilingual-mpnet-base-v2 on McElroy
2024 Cosine Correlation
results = read.csv("output/04_harmony_paraphrase-multilingual-mpnet-base-v2_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.30249 -0.07517 -0.00465 0.07130 0.37057
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.19729 0.03615 -5.458 6.57e-08 ***
## questions$y 0.92976 0.05573 16.684 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.114 on 739 degrees of freedom
## Multiple R-squared: 0.2736, Adjusted R-squared: 0.2726
## F-statistic: 278.4 on 1 and 739 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n04_harmony_paraphrase-multilingual-mpnet-base-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 05_harmony_all-mpnet-base-v2 on McElroy 2024 Cosine
Correlation
results = read.csv("output/05_harmony_all-mpnet-base-v2_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.27683 -0.07964 -0.00329 0.07186 0.39762
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.33308 0.03534 -9.425 <2e-16 ***
## questions$y 1.02645 0.05449 18.839 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1115 on 739 degrees of freedom
## Multiple R-squared: 0.3244, Adjusted R-squared: 0.3235
## F-statistic: 354.9 on 1 and 739 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n05_harmony_all-mpnet-base-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 10_jose on McElroy 2024 Cosine Correlation
results = read.csv("output/10_jose_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.33345 -0.09822 -0.01273 0.09032 0.39168
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.42997 0.04220 -10.19 <2e-16 ***
## questions$y 1.17787 0.06507 18.10 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1331 on 739 degrees of freedom
## Multiple R-squared: 0.3072, Adjusted R-squared: 0.3063
## F-statistic: 327.7 on 1 and 739 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n10_jose\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 11_raafi on McElroy 2024 Cosine Correlation
results = read.csv("output/11_raafi_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.38330 -0.10893 -0.00846 0.09813 0.46741
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.35469 0.04667 -7.599 9.04e-14 ***
## questions$y 1.03463 0.07196 14.378 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1472 on 739 degrees of freedom
## Multiple R-squared: 0.2186, Adjusted R-squared: 0.2175
## F-statistic: 206.7 on 1 and 739 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n11_raafi\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 20_openai_text-embedding-ada-002 on McElroy 2024 Cosine
Correlation
results = read.csv("output/20_openai_text-embedding-ada-002_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.088251 -0.018355 -0.000054 0.016758 0.096320
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.653339 0.008471 77.12 <2e-16 ***
## questions$y 0.275569 0.013060 21.10 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02672 on 739 degrees of freedom
## Multiple R-squared: 0.3759, Adjusted R-squared: 0.3751
## F-statistic: 445.2 on 1 and 739 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n20_openai_text-embedding-ada-002\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 21_openai_text-embedding-3-large on McElroy 2024 Cosine
Correlation
results = read.csv("output/21_openai_text-embedding-3-large_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.20141 -0.05750 -0.00543 0.04633 0.37589
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.13862 0.02705 -5.124 3.82e-07 ***
## questions$y 0.82011 0.04171 19.664 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08531 on 739 degrees of freedom
## Multiple R-squared: 0.3435, Adjusted R-squared: 0.3426
## F-statistic: 386.7 on 1 and 739 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n21_openai_text-embedding-3-large\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 40_google_vertex_ai_gecko on McElroy 2024 Cosine
Correlation
results = read.csv("output/40_google_vertex_ai_gecko_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.094162 -0.022628 -0.000911 0.019889 0.116381
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.61764 0.01047 59.01 <2e-16 ***
## questions$y 0.30045 0.01614 18.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03301 on 739 degrees of freedom
## Multiple R-squared: 0.3193, Adjusted R-squared: 0.3184
## F-statistic: 346.6 on 1 and 739 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n40_google_vertex_ai_gecko\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 41_google_vertex_ai_gecko_multilingual on McElroy 2024
Cosine Correlation
results = read.csv("output/41_google_vertex_ai_gecko_multilingual_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
##
## Call:
## lm(formula = results$y_pred ~ questions$y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.081837 -0.013984 0.000495 0.014695 0.071735
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.751519 0.007332 102.50 <2e-16 ***
## questions$y 0.204931 0.011304 18.13 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02312 on 739 degrees of freedom
## Multiple R-squared: 0.3078, Adjusted R-squared: 0.3069
## F-statistic: 328.6 on 1 and 739 DF, p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n41_google_vertex_ai_gecko_multilingual\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'
